Reading in two datasets and adjusting/filtering based on given conditions
library(tidyverse)
##
election <- read.csv("https://raw.githubusercontent.com/shakirovb/STA553/main/countypresidential_election_2000-2020.csv")
##
election.fips <- read.csv("https://raw.githubusercontent.com/shakirovb/STA553/main/fips2geocode.csv")
##
# Adding leading zeros in front of first fips so that the data will combine correctly
##
election$county_fips <- formatC(election$county_fips, width=5, flag="0")
##
election.fips$fips <- formatC(election.fips$fips, width=5, flag="0")
##
# Adding the correct total votes for San Joaquin County since it was labeled as NA
election$totalvotes <- ifelse(election$year == 2020 & election$county_name == "SAN JOAQUIN",
165130, election$totalvotes)
##
# Correcting the county name formerly known as Shannon County
election.fips$county <- ifelse(election.fips$fips == 46113 ,
"Oglala Lakota County", election.fips$county)
##
# Extract on 2020 presidential election data: year
# Only include Democrats and Republican votes: party
# Include variables: state_po, county_name, county_fips, party, candidatevotes
# Merge the above data with FIPS to Geocode Data using the FIPS as the primary
# key.
##
election2020 <- election %>%
filter(year == 2020,
party %in% c("DEMOCRAT", "REPUBLICAN")) %>%
group_by(state, county_name, party) %>%
summarise(votes = sum(candidatevotes))
# Since some counties have multiple rows for one candidate, we need to find the sum of the
# votes for each candidate using tidyverse
##
election2020 <- as.data.frame(election2020) # Changing from tibble to data.frame
##
# Since we only have the state, county name, and party in our current data frame,
# we need to combine it back with the election data set to obtain the geographic information
# #
election2020 <- right_join(election2020,
election[which(election$year == 2020),],
by = c("state", "county_name", "party")) %>%
arrange(county_fips) %>%
mutate(percentage = votes / totalvotes) %>%
select("state_po", "county_name", "county_fips", "party",
"votes", "percentage")
# Now we only want one observation per county with the percentage representing the winning party
electionwin2020 <- election2020 %>%
group_by(county_fips) %>%
summarise(winpercent = max(percentage, na.rm = TRUE)) %>%
# Now since we have only the fips and winning percentage, we need to combine it back in order to have county information
inner_join(election2020, by = c("county_fips", "winpercent" = "percentage")) %>%
distinct(county_fips, .keep_all = TRUE) %>% # Keeping only one observation per county
inner_join(election.fips, by = c("county_fips" = "fips")) # obtaining geographic information
dim(electionwin2020)
## [1] 3074 12
Choropleth Map set-up
library(plotly)
# Create hover text
electionwin2020$hover <- with(electionwin2020,
paste(county_name,
'<br>', str_to_title(rMapState),
'<br>', "Winner:", str_to_title(party),
'<br>', "Win_Percentage:",
round(abs(winpercent), 4)*100, "%",
'<br>', "Number_of_Votes", formatC(votes, big.mark = ",")))
url <- 'https://github.com/pengdsci/sta553/raw/main/data/geojson-counties-fips.json'
# contains geocode to define county boundaries in the choropleth map
counties <- rjson::fromJSON(file=url)
geo.apply <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showcountries = T,
countrycolor = toRGB("Black"),
showsubunits = T,
subunitcolor = toRGB("Black"),
showlakes = TRUE,
lakecolor = toRGB('white')
)
###
fig <- plot_ly() %>%
add_trace( type = "choropleth",
geojson = counties,
locations = electionwin2020$county_fips,
z = electionwin2020$winpercent,
colorscale = "bluered", #"RdBu"
zmin = -1,
zmax = 1,
text = electionwin2020$hover, # hover mesg
hovertemplate = paste('<i><b>%{text}',
'<extra></extra>'),
marker = list(line=list(width=0.2)),
showscale = FALSE) %>%
layout( title = "2020 Presidential Election",
geo = geo.apply)
fig